library(vroom)
## Warning: package 'vroom' was built under R version 4.0.4
    library(tidyverse)
## Registered S3 methods overwritten by 'readr':
##   method           from 
##   format.col_spec  vroom
##   print.col_spec   vroom
##   print.collector  vroom
##   print.date_names vroom
##   print.locale     vroom
##   str.col_spec     vroom
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.4     √ dplyr   1.0.2
## √ tidyr   1.1.2     √ stringr 1.4.0
## √ readr   1.4.0     √ forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x readr::col_character()  masks vroom::col_character()
## x readr::col_date()       masks vroom::col_date()
## x readr::col_datetime()   masks vroom::col_datetime()
## x readr::col_double()     masks vroom::col_double()
## x readr::col_factor()     masks vroom::col_factor()
## x readr::col_guess()      masks vroom::col_guess()
## x readr::col_integer()    masks vroom::col_integer()
## x readr::col_logical()    masks vroom::col_logical()
## x readr::col_number()     masks vroom::col_number()
## x readr::col_skip()       masks vroom::col_skip()
## x readr::col_time()       masks vroom::col_time()
## x readr::cols()           masks vroom::cols()
## x readr::default_locale() masks vroom::default_locale()
## x dplyr::filter()         masks stats::filter()
## x readr::fwf_empty()      masks vroom::fwf_empty()
## x readr::fwf_positions()  masks vroom::fwf_positions()
## x dplyr::lag()            masks stats::lag()
## x readr::locale()         masks vroom::locale()
## x readr::output_column()  masks vroom::output_column()
## x readr::problems()       masks vroom::problems()
    library(janitor)
## Warning: package 'janitor' was built under R version 4.0.4
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
    library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
    library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## The following object is masked from 'package:vroom':
## 
##     col_factor
    Drive <- vroom("eng317.csv")
## New names:
## * `` -> ...5
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 27,100
## Columns: 39
## Delimiter: ","
## chr [ 7]: group, license plate number, date, log_gasoline_1, log_co2_1, log_acc_1, log_dea...
## dbl [31]: ranking, alert, age, gender, accident, driving experience, gasoline consumption,...
## lgl [ 1]: ...5
## 
## Use `spec()` to retrieve the guessed column specification
## Pass a specification to the `col_types` argument to quiet this message
    factors <- c("group", "ranking", "alert", "gender","accident", "group", "app_usage")
    numerics <- c("log_co2_1", "log_acc_1", "log_gasoline_1", "log_deacc_1")
    
    Drive_clean <- Drive %>%
        janitor::clean_names() %>%# get rid of some `` and underscore problems
        rename(license = license_plate_number) %>% 
        mutate(license = na_if(license, "1.22E+16"),
               log_co2_1 = na_if(log_co2_1, "#NULL!"),
               log_acc_1 = na_if(log_acc_1, "#NULL!"),
               log_gasoline_1 = na_if(log_gasoline_1, "#NULL!"),
               log_deacc_1 = na_if(log_deacc_1, "#NULL!")) %>%
        filter(!is.na(license)) %>% # without license plate no car identifier
        transform(driver_id = as.numeric(factor(license))) %>% # add id column
        mutate(group = str_extract(group, '(?<=^.{1}).')) %>%
        mutate(date = lubridate::mdy(date)) %>% # convert to Date type
        mutate(across(all_of(factors), as.factor)) %>%
        mutate(across(all_of(numerics), as.numeric)) %>%
        relocate(where(is.numeric), .after = where(is.character)) %>%
        relocate(starts_with("log"), .after = last_col()) %>%
        relocate(driver_id, .before = 1) %>%
        relocate(driving_score, .before = 2) %>% # research focus near the front
        relocate(date, .after = 2) %>%
        relocate(app_usage, .before = 2) # dependent variable at the front
## Warning in FUN(X[[i]], ...): strings not representable in native encoding will
## be translated to UTF-8
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00DF>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C5>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E5>' to native encoding
## Warning: Problem with `mutate()` input `date`.
## i  20038 failed to parse.
## i Input `date` is `lubridate::mdy(date)`.
## Warning: 20038 failed to parse.
    Drive_Anhui <- Drive_clean %>%
        filter(str_detect(license, "皖"))
    
    plotly::ggplotly( # For some reasons doesn't knit, but executes fine
        Drive_Anhui %>%
            filter(mileage > 0) %>%
            select(license, rapid_acceleration, rapid_deacceleration, date) %>%
            mutate(month = month(date)) %>%
            group_by(license) %>%
            filter(n() > 50 & rapid_acceleration < 500) %>%
            # at least 30 data points over the year
            ggplot(aes(x=rapid_acceleration, y=rapid_deacceleration)) +
            geom_point(mapping = aes(color = month))  +
            geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
            facet_wrap(~license) +
            ggtitle("Threshold-measurement based profiling: Acceleration vs Deacceleration") +
            xlab("Rapid acceleration penalty count") +
            ylab("Rapid deacceleration penalty count")
    )
#library("knitr")
#install.packages("devtools")
#install_github("plotly", "ropensci")
#library("devtools")
#library(plotly)
#p

Sys.getlocale()
## [1] "LC_COLLATE=Chinese (Simplified)_China.936;LC_CTYPE=Chinese (Simplified)_China.936;LC_MONETARY=Chinese (Simplified)_China.936;LC_NUMERIC=C;LC_TIME=Chinese (Simplified)_China.936"